library(knitr)
opts_chunk$set(comment=NA, fig.width=6, fig.height=6, results='asis', warning=FALSE, message=FALSE)
library(ggplot2)
library(scales)
library(dplyr)
library(archivist)
# store everything in the archivist repo
setLocalRepo("arepo")
q3 <- function(x) {
a <- quantile(x, c(0.25,0.5,0.75))
names(a) <- c("ymin", "y", "ymax")
a
}
print.ggplot <- function(x, ...) {
hash <- saveToRepo(x)
cat("Load: [`archivist::aread('pbiecek/Eseje/arepo/",hash,"')`](https://github.com/pbiecek/Eseje/raw/master/arepo/gallery/",hash,".rda)\n", sep="")
ggplot2:::print.ggplot(x, ...)
}
W tym skrypcie wykorzystujemy pakiet SmarterPoland i zbiory danych
library(SmarterPoland)
head(countries)
country birth.rate death.rate population continent
1 Afghanistan 34.1 7.7 30552 Asia 2 Albania 12.9 9.4 3173 Europe 3 Algeria 24.3 5.7 39208 Africa 4 Andorra 8.9 8.4 79 Europe 5 Angola 44.1 13.9 21472 Africa 6 Antigua and Barbuda 16.5 6.8 90 Americas
head(maturaExam)
podstawowy.matematyka podstawowy.j.polski rok 1 19 35 2010 2 16 43 2010 3 25 39 2010 4 27 35 2010 5 27 43 2010 6 31 42 2010
countriesMin <- countries %>%
group_by(continent) %>%
filter(birth.rate == min(birth.rate, na.rm=TRUE))
countriesMax <- countries %>%
group_by(continent) %>%
filter(birth.rate == max(birth.rate, na.rm=TRUE))
theme_ggplain <- theme_bw() + theme(panel.grid.major.x = element_line(color="white"), axis.ticks=element_line(size=0), axis.text=element_text(size=0))
# pierwszy przyklad
ggplot(countries, aes(x=continent, y=birth.rate, label=country)) +
geom_violin(scale="area", fill="grey", color="white") +
stat_summary(fun.data = "q3", geom = "crossbar",
colour = "red", width = 0.4) +
geom_jitter(position=position_jitter(width = .25, height = 0),
shape=15) +
geom_rug(sides = "l") +
geom_text(data=countriesMin, vjust=2, color="blue3") +
geom_text(data=countriesMax, vjust=-1, color="blue3") +
theme_bw() + xlab("") + theme(panel.grid.major.x = element_line(color="white"))
Load: archivist::aread('pbiecek/Eseje/arepo/e02ad04b68e55035371b3b99112cdf3c')
ggplot(countries, aes(x=continent, y=birth.rate, label=country)) +
geom_text(data=countriesMin, vjust=2, color="blue3") +
geom_text(data=countriesMax, vjust=-1, color="blue3") +
theme_bw() + xlab("") + ylab("") + theme_ggplain
Load: archivist::aread('pbiecek/Eseje/arepo/0a276c489dba5d46cd9354148c12e862')
ggplot(countries, aes(x=continent, y=birth.rate, label=country)) +
geom_violin(scale="area", fill="grey", color="white") +
geom_text(data=countriesMin, vjust=2, color="white") +
geom_text(data=countriesMax, vjust=-1, color="white") +
theme_ggplain + xlab("") + ylab("")
Load: archivist::aread('pbiecek/Eseje/arepo/29ee0b9d83f0aac35402ea6b676c31cc')
ggplot(countries, aes(x=continent, y=birth.rate, label=country)) +
stat_summary(fun.data = "q3", geom = "crossbar",
colour = "red", width = 0.4) +
geom_text(data=countriesMin, vjust=2, color="white") +
geom_text(data=countriesMax, vjust=-1, color="white") +
theme_ggplain + xlab("") + ylab("")
Load: archivist::aread('pbiecek/Eseje/arepo/5ec69be3c89b05530ac5bb74a00596bd')
ggplot(countries, aes(x=continent, y=birth.rate, label=country)) +
geom_jitter(position=position_jitter(width = .25, height = 0),
shape=15) +
geom_text(data=countriesMin, vjust=2, color="white") +
geom_text(data=countriesMax, vjust=-1, color="white") +
theme_ggplain + xlab("") + ylab("")
Load: archivist::aread('pbiecek/Eseje/arepo/4c632001bd8b1ed9cbc98a59fe3a1256')
ggplot(countries, aes(x=continent, y=birth.rate, label=country)) +
geom_rug(sides = "l") +
geom_text(data=countriesMin, vjust=2, color="white") +
geom_text(data=countriesMax, vjust=-1, color="white") +
theme_ggplain + xlab("") + ylab("")
Load: archivist::aread('pbiecek/Eseje/arepo/bf8ea76ffff69955785a94532683e829')
# drugi przyklad
ggplot(countries, aes(x=birth.rate, y=death.rate)) +
geom_density_2d(h=c(10,10), color="grey") +
geom_point() + coord_fixed() +
geom_abline(intercept=0,slope=1) +
geom_point(data=countries[132,], color="red", size=4) +
theme_bw()
Load: archivist::aread('pbiecek/Eseje/arepo/13d5c2cf07e277c07e776473b106a1b0')
ggplot(countries, aes(x=birth.rate, y=death.rate)) +
geom_point() + coord_fixed() +
theme_bw()
Load: archivist::aread('pbiecek/Eseje/arepo/19794b19037daced210ff5c6d485d3be')
ggplot(countries, aes(x=birth.rate, y=death.rate,
color=continent, shape=continent)) +
geom_point() + coord_fixed() +
theme_bw() + theme(legend.position="top")
Load: archivist::aread('pbiecek/Eseje/arepo/4d7b9c309ad4875690da61c194f5ca1f')
countries$populationCat <- cut(countries$population,
c(1, 10^3, 10^4, 10^5, 10^6, 10^7),
labels = c("< 1M", "< 10M","< 100 M", "< 1 B", "> 1 B"), ordered_result = TRUE)
ggplot(countries, aes(x=birth.rate, y=death.rate,
color=continent, shape=continent,
size=populationCat)) +
geom_point() + coord_fixed() +
theme_bw() + theme(legend.position="top")
Load: archivist::aread('pbiecek/Eseje/arepo/f7bee75d70a77bd9e1ae66323afe8f6e')
# dotplot
ggplot(countries, aes(x = continent, y = birth.rate)) +
geom_dotplot(binaxis = "y", stackdir = "center", binwidth = 0.7) +
theme_bw()
Load: archivist::aread('pbiecek/Eseje/arepo/ccf21a147baebcf2498c7fc44f59e882')
# dotplot
ggplot(countries, aes(x = birth.rate, y =death.rate)) +
geom_point() +
theme_bw()
Load: archivist::aread('pbiecek/Eseje/arepo/4cc6027ddf39539bf985daefae7db7f5')
# jitter
ggplot(countries, aes(x = continent, y =birth.rate)) +
geom_jitter(position = position_jitter(width = .2)) +
theme_bw()
Load: archivist::aread('pbiecek/Eseje/arepo/7f97f57921664fcb7d30d927575dcb34')
# różne mapownia
ggplot() +
geom_point(data=countries, aes(x = birth.rate, y =death.rate, shape=continent), size=4) +
theme_bw() +
scale_shape_manual(values=c("F","A","S","E","O")) +
theme(legend.position=c(0.9,0.17))
Load: archivist::aread('pbiecek/Eseje/arepo/9603959bdb5fcde4916de25ac635783f')
ggplot() +
geom_point(data=countries, aes(x = birth.rate, y =death.rate, shape=continent, color=continent), size=4) +
theme_bw() +
scale_shape_manual(values=c("F","A","S","E","O")) +
theme(legend.position=c(0.9,0.17))
Load: archivist::aread('pbiecek/Eseje/arepo/0b7c872b34944871597cf7ee22a10124')
ggplot() +
geom_point(data=countries, aes(x = birth.rate, y =death.rate, color=continent), size=4, shape=19) +
theme_bw() + scale_color_brewer(type = "qual", palette=6) +
theme(legend.position=c(0.9,0.17))
Load: archivist::aread('pbiecek/Eseje/arepo/69e4470a064eca4faf21ecb10a9f9994')
ggplot() +
geom_point(data=countries, aes(x = birth.rate, y =death.rate, size=population)) +
scale_size_continuous(trans="sqrt", label=comma, limits=c(0,1500000)) +
theme_bw() + theme(legend.position="none")
Load: archivist::aread('pbiecek/Eseje/arepo/0a05f31ead54caee9572292f385caed5')
# density
ggplot(countries, aes(x = birth.rate, fill = continent)) +
geom_density(alpha=0.5) +
theme_bw() + theme(legend.position=c(0.85,0.85))
Load: archivist::aread('pbiecek/Eseje/arepo/7929dd727c479b1c939b3d0c5729cdc4')
ggplot(countries, aes(x = birth.rate, fill = continent)) +
geom_density( position="fill", color=NA) +
theme_bw() + theme(legend.position="top")
Load: archivist::aread('pbiecek/Eseje/arepo/43e30649a593a48c27f143b257851755')
# vioplot
ggplot(countries, aes(x = continent, y = birth.rate, fill=continent)) +
geom_violin() +
theme_bw() + theme(legend.position="none")
Load: archivist::aread('pbiecek/Eseje/arepo/26d9c58bd0ff95117e1d37aeb47512de')
# ribbon
ndf <- countries %>%
group_by(continent) %>%
summarise(birth.rate = weighted.mean(birth.rate, population, na.rm=TRUE),
death.rate = weighted.mean(death.rate, population, na.rm=TRUE))
ggplot() +
geom_ribbon(data=ndf, aes(x=continent, ymax=birth.rate, y=birth.rate, ymin=0, group=1), fill="green3") +
geom_ribbon(data=ndf, aes(x=continent, ymax=death.rate, y=death.rate, ymin=0, group=1), fill="red3") +
theme_bw() + xlab("") + ylab("birth.rate / death.rate")
Load: archivist::aread('pbiecek/Eseje/arepo/70366ebb679bdc0d6bee88b9056932b7')
continents <-
countries %>%
group_by(continent) %>%
summarise(birth.rate = weighted.mean(birth.rate, w = population, na.rm=TRUE),
death.rate = weighted.mean(death.rate, w = population, na.rm=TRUE),
population = sum(population))
# bar
ggplot(countries, aes(x = birth.rate, y = death.rate)) +
geom_rect(xmin=12.38, xmax=27.85, ymin=0, ymax=18, alpha=0.3, fill="grey90") +
geom_point() +
theme_bw()
Load: archivist::aread('pbiecek/Eseje/arepo/19bf57d4e6f50edc4e1a3d057912b044')
# bar
ggplot(continents, aes(x = continent, y = birth.rate)) +
geom_bar(stat = "identity") +
theme_bw()
Load: archivist::aread('pbiecek/Eseje/arepo/59442be9ea10e26f0b817f6ad0dd0f31')
continents2 <- continents %>%
mutate(cum = cumsum(population)-population)
ggplot() +
geom_rect(data=continents2, aes(xmin=cum, ymax=0, xmax=cum+population, ymin=birth.rate, fill=continent)) +
geom_text(data=continents2, aes(label=continent, x=cum+population/2, y=birth.rate), vjust=-0.1) +
theme_bw() + theme(legend.position="none") +
xlab("population") + ylab("birth.rate")
Load: archivist::aread('pbiecek/Eseje/arepo/519b348ef25c8106739f6554d20efdd0')
ggplot() +
geom_rect(data=continents, aes(xmin = as.numeric(factor(continent))-0.1,
ymin = 0,
xmax = as.numeric(factor(continent))+0.2,
ymax = birth.rate),
fill="green3") +
geom_rect(data=continents, aes(xmin = as.numeric(factor(continent))+0.21,
ymin = birth.rate - death.rate,
xmax = as.numeric(factor(continent))+0.51,
ymax = birth.rate),
fill="red3") +
geom_text(data=continents, aes(x = as.numeric(factor(continent))+0.21,
y = birth.rate + 1,
label = continent)) +
geom_hline(yintercept=0) + ylab("birth.rate - death.rate") + xlab("") +
geom_rug(data=continents, aes(x = as.numeric(factor(continent))+0.21,
y = birth.rate - death.rate), sides="l") +
theme_bw() + theme(axis.text.x = element_text(color="white"),
axis.ticks.x = element_line(color="white"))
Load: archivist::aread('pbiecek/Eseje/arepo/ddd7f9aa123375309cd1ad9a96700df0')
# liczba krajów na kontynent
ggplot(countries, aes(x = continent, fill = continent)) +
geom_bar() +
theme_bw() + theme(legend.position="none") + xlab("") + ylab("Liczba krajów")
Load: archivist::aread('pbiecek/Eseje/arepo/7c0e64651d0f94435d6bb9af38289e92')
# line
ggplot(countries, aes(x = birth.rate, y = death.rate)) +
geom_line() +
theme_bw()
Load: archivist::aread('pbiecek/Eseje/arepo/c05f80f85c7e7a16ae69ebba0ea0275f')
# smooth
ggplot(countries, aes(x = birth.rate, y = death.rate)) +
geom_point(color="white", alpha=0) +
geom_smooth(se=FALSE, size=3, color="black") +
theme_bw()
Load: archivist::aread('pbiecek/Eseje/arepo/6f9b33857dc61785006596611b013acf')
ggplot(countries, aes(x = birth.rate, y = death.rate)) +
geom_point(color="black", alpha=0.3) +
geom_smooth(se=FALSE, size=2, color="red4", method="lm", formula = y~poly(x,1)) +
geom_smooth(se=FALSE, size=2, color="red3", method="lm", formula = y~poly(x,2)) +
geom_smooth(se=FALSE, size=2, color="red1", span=0.5) +
theme_bw()
Load: archivist::aread('pbiecek/Eseje/arepo/fccfa878e6600c58e939979081d088b0')
# arrow
library(grid)
countries$country <- reorder(countries$country, countries$birth.rate, mean)
ggplot() +
geom_segment(data=countries[countries$continent == "Europe",], aes(x = country, xend = country,
y = birth.rate, yend=death.rate),
arrow = arrow(length = unit(0.1,"cm"))) +
theme_bw() + coord_flip() + theme(legend.position="none") +
ylab("<---- more births more deaths ---->") + xlab("")
Load: archivist::aread('pbiecek/Eseje/arepo/dc15df3d9b42f188b488b5184f2813ae')
ggplot() +
geom_segment(data=countries[countries$continent == "Europe",], aes(x = country, xend = country,
y = birth.rate, yend=death.rate,
color=death.rate > birth.rate),
arrow = arrow(length = unit(0.1,"cm"))) +
theme_bw() + coord_flip() + theme(legend.position="top") +
ylab("<---- more births more deaths ---->") + xlab("") +
scale_color_manual(values = c("green3", "red3"), labels=c("More births than deaths","More deaths than births"), name="")
Load: archivist::aread('pbiecek/Eseje/arepo/6066f80bf716415d37f4f40cde479193')
ggplot() +
geom_segment(data=countries[countries$continent == "Europe",], aes(x = country, xend = country,
y = birth.rate, yend=death.rate,
size=population),
arrow = arrow(length = unit(0.1,"cm"), type="closed")) +
theme_bw() + coord_flip() + theme(legend.position="none") +
ylab("<---- more births more deaths ---->") + xlab("")
Load: archivist::aread('pbiecek/Eseje/arepo/a3f4a11791537fd9d3ed8c3c22a825b1')
ggplot() +
geom_segment(data=countries[countries$continent == "Europe",], aes(x = country, xend = country,
y = birth.rate, yend=death.rate,
linetype=birth.rate > death.rate),
arrow = arrow(length = unit(0.1,"cm"), type="closed")) +
theme_bw() + coord_flip() + theme(legend.position="top") +
ylab("<---- more births more deaths ---->") + xlab("") +
scale_linetype_manual(values = c(1,2), labels=c("More births than deaths","More deaths than births"), name="")
Load: archivist::aread('pbiecek/Eseje/arepo/1bb4e242a49297bf6a54fa64f5680416')
# error bars
conts <- countries %>%
group_by(continent) %>%
summarise(bmin = min(birth.rate, na.rm=TRUE),
bmax = max(birth.rate, na.rm=TRUE),
bmea = weighted.mean(birth.rate, w = population, na.rm=TRUE),
dmin = min(death.rate, na.rm=TRUE),
dmax = max(death.rate, na.rm=TRUE),
dmea = weighted.mean(death.rate, w = population, na.rm=TRUE),
population = sum(population, na.rm=TRUE)
)
ggplot(conts, aes(x = bmea, y = dmea,
ymin = dmin, ymax = dmax,
xmin = bmin, xmax = bmax,
color=continent))+
geom_point() +
geom_errorbar(width=0.5) +
# geom_errorbarh(width=0.5) +
theme_bw() + xlab("birth.rate") + ylab("death.rate") +
theme(legend.position="none")
Load: archivist::aread('pbiecek/Eseje/arepo/2c21b7e6a84f25312c9bc0f02575a273')
# stat_binhex
# geomBinHex.pdf
ggplot(countries, aes(x = birth.rate, y = death.rate)) +
stat_binhex(bins = 9) + scale_fill_gradient(low = "white", high = "black") +
theme_bw()
Load: archivist::aread('pbiecek/Eseje/arepo/eed22b09727397c0b8748dc1e254f949')
# geomRug
# geomRug.pdf
ggplot(countries, aes(x = birth.rate, y = death.rate)) +
geom_rug() +
theme_bw()
Load: archivist::aread('pbiecek/Eseje/arepo/d4d0d1b2f1aeadc298e47253987c1975')
continents <-
countries %>%
group_by(continent) %>%
summarise(birth.rate = mean(birth.rate, na.rm=TRUE),
death.rate = mean(death.rate, na.rm=TRUE))
# text
#geomText.pdf
ggplot(continents, aes(x = birth.rate, y = death.rate, label = continent)) +
geom_text(alpha=1) +
theme_bw() + xlim(8,35)
Load: archivist::aread('pbiecek/Eseje/arepo/7c1208e511986b6a8ac7b5b3518605d0')
# boxplot
ggplot(countries, aes(x = continent, y = birth.rate)) +
geom_boxplot(fill="grey", coef = 3) +
theme_bw()
Load: archivist::aread('pbiecek/Eseje/arepo/de2382fd045555f9409cbce2896e6f8b')
# crossbar
ggplot(countries, aes(x = continent, y = birth.rate)) +
geom_jitter(position=position_jitter(width=0.25), color="grey") +
stat_summary(fun.data = "mean_cl_boot", geom = "crossbar", width = 0.3) +
theme_bw()
Load: archivist::aread('pbiecek/Eseje/arepo/62c23aba3622949ff60366f3eadcb2c4')
# strzalki
library(tidyr)
library(grid)
cq <- countries %>%
group_by(continent) %>%
summarise(q1 = quantile(birth.rate, 0.25, na.rm=TRUE),
q2 = quantile(birth.rate, 0.5, na.rm=TRUE),
q3 = quantile(birth.rate, 0.75, na.rm=TRUE))
cq13 <- cq %>%
gather(key, value, -continent)
# statQ1.pdf
ggplot(cq13, aes(x=continent, y=value, group=continent)) +
geom_jitter(data=countries, aes(x = continent, y = birth.rate), position=position_jitter(width=0.25), color="grey") +
geom_path(arrow=arrow(ends = "both"), size=2) +
geom_point(data=cq13[cq13$key == "q2",], aes(x=continent, y=value), size=5) +
theme_bw() + xlab("") + ylab("Kwartyle i mediana dla wsp. urodzin")
Load: archivist::aread('pbiecek/Eseje/arepo/1aa47530dde226a801bcd493003f9f9a')
# statQ2.pdf
ggplot(cq, aes(x=continent, y=q2)) +
geom_boxplot(aes(ymin=q1, lower=q1, middle=q2, upper=q3, ymax=q3), stat="identity") +
geom_jitter(data=countries, aes(x = continent, y = birth.rate), position=position_jitter(width=0.25), color="grey") +
theme_bw() + xlab("") + ylab("Kwartyle i mediana dla wsp. urodzin")
Load: archivist::aread('pbiecek/Eseje/arepo/1313e90c0c1ddec5cf9116b8e0a04834')
# statQ3.pdf
ggplot(cq, aes(x=continent, y=q2)) +
geom_jitter(data=countries, aes(x = continent, y = birth.rate), position=position_jitter(width=0.25), color="grey") +
geom_errorbar(aes(ymin=q1, ymax=q3), stat="identity", width=0.3, size=2) +
geom_text(label="*", size=25) +
theme_bw() + xlab("") + ylab("Kwartyle i mediana dla wsp. urodzin")
Load: archivist::aread('pbiecek/Eseje/arepo/2fec71682ef4f75928d6e213b3abc686')
# contour
ggplot(countries, aes(x=birth.rate, y=death.rate)) +
coord_fixed() + xlim(0,50) + ylim(0,16)+
geom_density2d(h=c(10,10), color="grey") +
theme_bw()
Load: archivist::aread('pbiecek/Eseje/arepo/d04db533e2252ce6af66a11ee2c00c27')
ggplot(countries, aes(x=birth.rate, y=death.rate)) +
coord_fixed() + xlim(0,50) + ylim(0,16)+
stat_density2d(h=c(10,10), geom="polygon", alpha=0.25) +
theme_bw()
Load: archivist::aread('pbiecek/Eseje/arepo/0c77057670bfe2e0510d2b454bd27f15')
ggplot(countries, aes(x=birth.rate, y=death.rate)) +
coord_fixed() + xlim(0,50) + ylim(0,16)+
stat_density2d(h=c(10,10), geom="tile", aes(fill = ..density..), contour = FALSE) + scale_fill_gradient(low="white", high="black") +
theme_minimal()
Load: archivist::aread('pbiecek/Eseje/arepo/3ecbf4864e8e78a32659990785ef9e57')
ggplot(countries, aes(x=birth.rate, y=death.rate)) +
coord_fixed() + xlim(0,50) + ylim(0,16)+
stat_density2d(h=c(10,10), n=c(50,15), geom="point", aes(size = ..density..), contour = FALSE) + scale_size_continuous(range=c(0,2)) +
theme_minimal()
Load: archivist::aread('pbiecek/Eseje/arepo/4becd71bfca1970acc67bcf0b7f3bdb5')